# config.py
"""
项目配置文件，包含所有敏感信息、路径和可调参数。
"""

# --- API Keys ---
# 强烈建议使用环境变量来管理API密钥，这里为了简单直接写出
# import os
# TAVILY_API_KEY = os.getenv("TAVILY_API_KEY", "your_default_key_here")
TAVILY_API_KEY = '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'
DEEPSEEK_API_KEY = 'sk-f5f3487896554276aaf657fe9b9661a7'

# --- Model Configuration ---
LLM_MODEL_NAME = 'deepseek-chat'
# 注意：确保这个路径在你的环境中是正确的
EMBED_MODEL_PATH = "local:D:/pythonProject17/transformers/model_em/BAAI/bge-small-en-v1.5"

# --- Data Fetching Parameters ---
SEARCH_QUERY = "有关于llamaindex的内容"
MAX_SEARCH_RESULTS = 20

# --- Data Processing Parameters ---
CHUNK_SIZE = 512
CHUNK_OVERLAP = 20

# --- Dataset Generation Parameters ---
NUM_QUESTIONS_PER_CHUNK = 2

# --- Output File Paths ---
RAW_RAG_DATASET_PATH = './微调数据集_raw.json'
SFT_DATASET_PATH = './sft_conversation_dataset.json'